/********************************************************************************
* 3ie - ART home delivery: 							
*
* 	Created: 22 November 2017
* 	Last updated: 17 February 2018 
*
* 	This file:
* 		1. 
*
* 	Sections:
*		1. 

*******************************************************************************/


clear all
set more off

/******************************************
		Load data
******************************************/


global serverpath  "enter pathname here"


use "$serverpath/3ieforanalysis_2018-04-30.dta", clear

/******************************************
		Clean factor variables
******************************************/
_strip_labels intervfac 
replace intervfac = intervfac -1
_strip_labels intervrec
replace intervrec = intervrec -1
_strip_labels ltfu
replace ltfu = ltfu -1
_strip_labels male
replace male = male -1

/******************************************
		Analysis of VL data
******************************************/


********** WHOLE SAMPLE
// Model 1: Completely unadjusted 
glm vlfail_end_num intervfac, fam(bin) link(log) eform cluster(facility_id) 
glm vlfail_end_num intervfac, fam(bin) link(log) eform cluster(facility_id) level(90)


// Model 2: Model 1 + adjustment for baseline vl/cd4
glm vlfail_end_num intervfac vlorcd4fail_base_num, fam(bin) link(log) eform cluster(facility_id) difficult search 
* needs difficult search because otherwise won't converge
glm vlfail_end_num intervfac vlorcd4fail_base_num, fam(bin) link(log) eform cluster(facility_id) difficult search level(90)


// Model 3: Model 2 + sex and age 
glm vlfail_end_num intervfac vlorcd4fail_base_num age_clean male_num, fam(bin) link(log) eform cluster(facility_id) difficult search 
glm vlfail_end_num intervfac vlorcd4fail_base_num age_clean male_num, fam(bin) link(log) eform cluster(facility_id) difficult search level(90)


** For appendix only
// Model 4: Model 3 + adjustment for follow-up period 
glm vlfail_end_num intervfac vlorcd4fail_base_num futime, fam(bin) link(log) eform cluster(facility_id) difficult search 
glm vlfail_end_num intervfac vlorcd4fail_base_num futime, fam(bin) link(log) eform cluster(facility_id) difficult search level(90)


// Model 5: Model 3 + adjustment for time between baseline vl/cd4 and endline vl
glm vlfail_end_num intervfac vlorcd4fail_base_num timebetweenvls, fam(bin) link(log) eform cluster(facility_id) difficult search 
glm vlfail_end_num intervfac vlorcd4fail_base_num timebetweenvls, fam(bin) link(log) eform cluster(facility_id) difficult search level(90)


// Model 5: Adjusting for both futime and time between baseline vl/cd4 and endline vl
glm vlfail_end_num intervfac vlorcd4fail_base_num futime timebetweenvls, fam(bin) link(log) eform cluster(facility_id) difficult search 
glm vlfail_end_num intervfac vlorcd4fail_base_num futime timebetweenvls, fam(bin) link(log) eform cluster(facility_id) difficult search level(90)



********** Restricting to those stable on ART at baseline 
// Model 1: Completely unadjusted 
glm vlfail_end_num intervfac if vlorcd4fail_base_num==0, fam(bin) link(log) eform cluster(facility_id) 
glm vlfail_end_num intervfac if vlorcd4fail_base_num==0, fam(bin) link(log) eform cluster(facility_id) level(90)

// Model 2: Model 1 + adjustment for follow-up period (pair removed to help with convergence)
glm vlfail_end_num intervfac futime if vlorcd4fail_base_num==0, fam(bin) link(log) eform cluster(facility_id) difficult search 
glm vlfail_end_num intervfac futime if vlorcd4fail_base_num==0, fam(bin) link(log) eform cluster(facility_id) difficult search level(90) 

// Model 3: Model 1 + adjustment for time between baseline vl/cd4 and endline vl (pair removed to help with convergence)
glm vlfail_end_num intervfac timebetweenvls if vlorcd4fail_base_num==0, fam(bin) link(log) eform cluster(facility_id) difficult search 
glm vlfail_end_num intervfac timebetweenvls if vlorcd4fail_base_num==0, fam(bin) link(log) eform cluster(facility_id) difficult search level(90)

// Model 4: Model 2 + adjustment for time between baseline vl/cd4 and endline vl (pair removed to help with convergence)
glm vlfail_end_num intervfac timebetweenvls futime if vlorcd4fail_base_num==0, fam(bin) link(log) eform cluster(facility_id) difficult search 
glm vlfail_end_num intervfac timebetweenvls futime if vlorcd4fail_base_num==0, fam(bin) link(log) eform cluster(facility_id) difficult search level(90)

// Model 5: Model 4 + male_num and age_clean (pair removed to help with convergence)
glm vlfail_end_num intervfac futime timebetweenvls male_num age_clean if vlorcd4fail_base_num==0, fam(bin) link(log) eform cluster(facility_id) difficult search 
glm vlfail_end_num intervfac futime timebetweenvls male_num age_clean if vlorcd4fail_base_num==0, fam(bin) link(log) eform cluster(facility_id) difficult search level(90)



********** Restricting to those whose endline viral load was at least 200 days after enrolment into the trial 
// Model 1: Completely unadjusted 
glm vlfail_end_num intervfac if futime>=200, fam(bin) link(log) eform cluster(facility_id) 
glm vlfail_end_num intervfac if futime>=200, fam(bin) link(log) eform cluster(facility_id) level(90)

// Model 2: Model 1 + adjustment for baseline vl/cd4
glm vlfail_end_num intervfac vlorcd4fail_base_num if futime>=200, fam(bin) link(log) eform cluster(facility_id) difficult search 
* needs difficult search because otherwise won't converge
glm vlfail_end_num intervfac vlorcd4fail_base_num if futime>=200, fam(bin) link(log) eform cluster(facility_id) difficult search level(90)

// Model 3: Model 2 + adjustment for follow-up period 
glm vlfail_end_num intervfac vlorcd4fail_base_num futime if futime>=200, fam(bin) link(log) eform cluster(facility_id) difficult search 
glm vlfail_end_num intervfac vlorcd4fail_base_num futime if futime>=200, fam(bin) link(log) eform cluster(facility_id) difficult search level(90)

// Model 4: Model 2 + adjustment for time between baseline vl/cd4 and endline vl
glm vlfail_end_num intervfac vlorcd4fail_base_num timebetweenvls if futime>=200, fam(bin) link(log) eform cluster(facility_id) difficult search 
glm vlfail_end_num intervfac vlorcd4fail_base_num timebetweenvls if futime>=200, fam(bin) link(log) eform cluster(facility_id) difficult search level(90)

// Model 5: Model 3 + adjustment for time between baseline vl/cd4 and endline vl 
glm vlfail_end_num intervfac vlorcd4fail_base_num futime timebetweenvls if futime>=200, fam(bin) link(log) eform cluster(facility_id) difficult search 
glm vlfail_end_num intervfac vlorcd4fail_base_num futime timebetweenvls if futime>=200, fam(bin) link(log) eform cluster(facility_id) difficult search level(90)

// Model 6: Model 5 + age_clean and male_num (pair removed to help with convergence)
glm vlfail_end_num intervfac vlorcd4fail_base_num futime timebetweenvls age_clean male_num if futime>=200, fam(bin) link(log) eform cluster(facility_id) difficult search 
glm vlfail_end_num intervfac vlorcd4fail_base_num futime timebetweenvls age_clean male_num if futime>=200, fam(bin) link(log) eform cluster(facility_id) difficult search level(90)



********** Restricting to those whose endline viral load was at least 200 days after the baseline viral load 
// Model 1: Completely unadjusted 
glm vlfail_end_num intervfac if timebetweenvls>=200, fam(bin) link(log) eform cluster(facility_id) 
glm vlfail_end_num intervfac if timebetweenvls>=200, fam(bin) link(log) eform cluster(facility_id) level(90)

// Model 2: Model 1 + adjustment for baseline vl/cd4
glm vlfail_end_num intervfac vlorcd4fail_base_num if timebetweenvls>=200, fam(bin) link(log) eform cluster(facility_id) difficult search 
* needs difficult search because otherwise won't converge
glm vlfail_end_num intervfac vlorcd4fail_base_num if timebetweenvls>=200, fam(bin) link(log) eform cluster(facility_id) difficult search level(90)

// Model 3: Model 2 + adjustment for follow-up period 
glm vlfail_end_num intervfac vlorcd4fail_base_num futime if timebetweenvls>=200, fam(bin) link(log) eform cluster(facility_id) difficult search 
glm vlfail_end_num intervfac vlorcd4fail_base_num futime if timebetweenvls>=200, fam(bin) link(log) eform cluster(facility_id) difficult search level(90)

// Model 4: Model 2 + adjustment for time between baseline vl/cd4 and endline vl
glm vlfail_end_num intervfac vlorcd4fail_base_num timebetweenvls if timebetweenvls>=200, fam(bin) link(log) eform cluster(facility_id) difficult search 
glm vlfail_end_num intervfac vlorcd4fail_base_num timebetweenvls if timebetweenvls>=200, fam(bin) link(log) eform cluster(facility_id) difficult search level(90)

// Model 5: Model 3 + adjustment for time between baseline vl/cd4 and endline vl 
glm vlfail_end_num intervfac vlorcd4fail_base_num futime timebetweenvls if timebetweenvls>=200, fam(bin) link(log) eform cluster(facility_id) difficult search 
glm vlfail_end_num intervfac vlorcd4fail_base_num futime timebetweenvls if timebetweenvls>=200, fam(bin) link(log) eform cluster(facility_id) difficult search level(90)

// Model 6: Model 5 + age_clean and male_num (pair removed to help with convergence)
glm vlfail_end_num intervfac vlorcd4fail_base_num futime timebetweenvls age_clean male_num if timebetweenvls>=200, fam(bin) link(log) eform cluster(facility_id) difficult search 
glm vlfail_end_num intervfac vlorcd4fail_base_num futime timebetweenvls age_clean male_num if timebetweenvls>=200, fam(bin) link(log) eform cluster(facility_id) difficult search level(90)






********** Restricting to those whose endline viral load was at least 200 days after enrolment into the trial and who were stable at baseline
// NOTE: can't adjust for pair here because lots of pairs get dropped then
// Model 1: Completely unadjusted 
glm vlfail_end_num intervfac if vlorcd4fail_base_num==0 & futime>=200, fam(bin) link(log) eform cluster(facility_id) 

// Model 2: Model 1 + adjustment for follow-up period (pair removed to help with convergence)
glm vlfail_end_num intervfac futime if vlorcd4fail_base_num==0 & futime>=200, fam(bin) link(log) eform cluster(facility_id) difficult search 

// Model 3: Model 2 + adjustment for time between baseline vl/cd4 and endline vl (pair removed to help with convergence)
glm vlfail_end_num intervfac futime timebetweenvls if vlorcd4fail_base_num==0 & futime>=200, fam(bin) link(log) eform cluster(facility_id) difficult search 

// Model 4: Model 3 + male_num and age_clean (pair removed to help with convergence)
glm vlfail_end_num intervfac futime timebetweenvls male_num age_clean if vlorcd4fail_base_num==0 & futime>=200, fam(bin) link(log) eform cluster(facility_id) difficult search 



********** Restricting to those whose endline viral load was at least 200 days after the baseline viral load and who were stable at baseline 
// NOTE: can't adjust for pair because many drop out 
// Model 1: Completely unadjusted 
glm vlfail_end_num intervfac if vlorcd4fail_base_num==0 & timebetweenvls>=200, fam(bin) link(log) eform cluster(facility_id) 

// Model 2: adjustment for follow-up period (pair removed to help with convergence)
glm vlfail_end_num intervfac futime if vlorcd4fail_base_num==0 & timebetweenvls>=200, fam(bin) link(log) eform cluster(facility_id) difficult search 

// Model 3: Model 2 + adjustment for time between baseline vl/cd4 and endline vl (pair removed to help with convergence)
glm vlfail_end_num intervfac futime timebetweenvls if vlorcd4fail_base_num==0 & timebetweenvls>=200, fam(bin) link(log) eform cluster(facility_id) difficult search 

// Model 4: Model 3 + age_clean and male_num (pair removed to help with convergence)
glm vlfail_end_num intervfac futime timebetweenvls age_clean male_num if vlorcd4fail_base_num==0 & timebetweenvls>=200, fam(bin) link(log) eform cluster(facility_id) difficult search 





/******************************************
		 Instrumental variable regression
******************************************/

********** WHOLE SAMPLE
// Model 1: unadjusted
ivreg2 vlfail_end_num (intervrec_num = intervfac), cluster(facility_id) 


// Model 2: Model 1 + adjustment for baseline vl/cd4
ivreg2 vlfail_end_num vlorcd4fail_base_num (intervrec_num = intervfac), cluster(facility_id) 


// Model 3: Model 2 + sex and age 
ivreg2 vlfail_end_num vlorcd4fail_base_num age_clean male_num (intervrec_num = intervfac), cluster(facility_id) 


********** ONLY COUNTS THOSE AS HAVING RECEIVED INTERVENTION IF timeonarthomedel >=90
// Model 1: unadjusted
ivreg2 vlfail_end_num (intervrec_num_onlygrt90 = intervfac), cluster(facility_id) 


// Model 2: Model 1 + adjustment for baseline vl/cd4
ivreg2 vlfail_end_num vlorcd4fail_base_num (intervrec_num_onlygrt90 = intervfac), cluster(facility_id) 


// Model 3: Model 2 + sex and age 
ivreg2 vlfail_end_num vlorcd4fail_base_num age_clean male_num (intervrec_num_onlygrt90 = intervfac), cluster(facility_id) 


********** ONLY COUNTS THOSE AS HAVING RECEIVED INTERVENTION IF timeonarthomedel >=180
// Model 1: unadjusted
ivreg2 vlfail_end_num (intervrec_num_onlygrt180 = intervfac), cluster(facility_id) 


// Model 2: Model 1 + adjustment for baseline vl/cd4
ivreg2 vlfail_end_num vlorcd4fail_base_num (intervrec_num_onlygrt180 = intervfac), cluster(facility_id) 


// Model 3: Model 2 + sex and age 
ivreg2 vlfail_end_num vlorcd4fail_base_num age_clean male_num (intervrec_num_onlygrt180 = intervfac), cluster(facility_id) 


********** ONLY COUNTS THOSE AS HAVING RECEIVED INTERVENTION IF timeonarthomedel >=90 & timebetweenvls >=200
// Model 1: unadjusted
ivreg2 vlfail_end_num (intervrec_num_onlygrt90 = intervfac) if timebetweenvls>=200, cluster(facility_id) 


// Model 2: Model 1 + adjustment for baseline vl/cd4
ivreg2 vlfail_end_num vlorcd4fail_base_num (intervrec_num_onlygrt90 = intervfac) if timebetweenvls>=200, cluster(facility_id) 


// Model 3: Model 2 + sex and age 
ivreg2 vlfail_end_num vlorcd4fail_base_num age_clean male_num (intervrec_num_onlygrt90 = intervfac) if timebetweenvls>=200, cluster(facility_id) 


********** ONLY COUNTS THOSE AS HAVING RECEIVED INTERVENTION IF timeonarthomedel >=180 & timebetweenvls >=200
// Model 1: unadjusted
ivreg2 vlfail_end_num (intervrec_num_onlygrt180 = intervfac) if timebetweenvls>=200, cluster(facility_id) 


// Model 2: Model 1 + adjustment for baseline vl/cd4
ivreg2 vlfail_end_num vlorcd4fail_base_num (intervrec_num_onlygrt180 = intervfac) if timebetweenvls>=200, cluster(facility_id) 


// Model 3: Model 2 + sex and age 
ivreg2 vlfail_end_num vlorcd4fail_base_num age_clean male_num (intervrec_num_onlygrt180 = intervfac) if timebetweenvls>=200, cluster(facility_id) 



********** only if stable at baseline
// Model 1: unadjusted
ivreg2 vlfail_end_num (intervrec_num = intervfac) if vlorcd4fail_base_num==0, cluster(facility_id) 


// Model 2: Model 1 + sex and age 
ivreg2 vlfail_end_num age_clean male_num (intervrec_num = intervfac) if vlorcd4fail_base_num==0, cluster(facility_id) 


********** ONLY COUNTS THOSE AS HAVING RECEIVED INTERVENTION IF timeonarthomedel >=90 and if stable at baseline
// Model 1: unadjusted
ivreg2 vlfail_end_num (intervrec_num_onlygrt90 = intervfac) if vlorcd4fail_base_num==0, cluster(facility_id) 


// Model 2: Model 1 + sex and age 
ivreg2 vlfail_end_num age_clean male_num (intervrec_num_onlygrt90 = intervfac) if vlorcd4fail_base_num==0, cluster(facility_id) 


********** ONLY COUNTS THOSE AS HAVING RECEIVED INTERVENTION IF timeonarthomedel >=180 and if stable at baseline
// Model 1: unadjusted
ivreg2 vlfail_end_num (intervrec_num_onlygrt180 = intervfac) if vlorcd4fail_base_num==0, cluster(facility_id) 


// Model 2: Model 1 + sex and age 
ivreg2 vlfail_end_num age_clean male_num (intervrec_num_onlygrt180 = intervfac) if vlorcd4fail_base_num==0, cluster(facility_id) 



********** ONLY COUNTS THOSE AS HAVING RECEIVED INTERVENTION IF timeonarthomedel >=90 and if stable at baseline & timebetweenvls >=200
// Model 1: unadjusted
ivreg2 vlfail_end_num (intervrec_num_onlygrt90 = intervfac) if vlorcd4fail_base_num==0 & timebetweenvls>=200, cluster(facility_id) 


// Model 2: Model 1 + sex and age 
ivreg2 vlfail_end_num age_clean male_num (intervrec_num_onlygrt90 = intervfac) if vlorcd4fail_base_num==0 & timebetweenvls>=200, cluster(facility_id) 




********** ONLY COUNTS THOSE AS HAVING RECEIVED INTERVENTION IF timeonarthomedel >=180 and if stable at baseline and timebetweenvls >=200
// Model 1: unadjusted
ivreg2 vlfail_end_num (intervrec_num_onlygrt180 = intervfac) if vlorcd4fail_base_num==0 & timebetweenvls>=200, cluster(facility_id) 


// Model 2: Model 1 + sex and age 
ivreg2 vlfail_end_num age_clean male_num (intervrec_num_onlygrt180 = intervfac) if vlorcd4fail_base_num==0 & timebetweenvls>=200, cluster(facility_id) 




/******************************************
		Analysis of healthcare expenditure data
******************************************/
use "$serverpath/endlinecostdat_2017-12-09.dta", clear

_strip_labels intervfac 
replace intervfac = intervfac -1

// descriptive
bysort intervfac: sum todcost, detail
bysort intervfac: ci todcost

// inference on mean
ritest intervfac _b[intervfac], cluster(facility_clean) strata(pair) rep(10000) seed(53757): reg todcost intervfac i.pair, cluster(facility_clean) 

// inference on median
tab pair, gen(pair_)
ritest intervfac _b[intervfac], cluster(facility_clean) strata(pair) rep(10000) seed(53757): qreg2 todcost intervfac pair_*, cluster(facility_clean)


